#-*- coding:utf-8 -*-
from scrapy.spiders import CrawlSpider,Request
from data5u.items import Data5UItem
from scrapy.selector import Selector

class Data5u(CrawlSpider):
    name='Data5uSpiders'
    allowed_domains=[]
    start_url='http://www.data5u.com/'   
    def start_requests(self):
        yield Request(self.start_url,self.data5u_parse)
    
    def data5u_parse(self,response):
        sel = Selector(response)
        data = []
        for i in range(2,22):
            data_list = []
            for j in range(1,10):
                x_path = "/html/body/div[4]/li[2]/ul[%s]/span[%s]/li/" % (str(i),str(j))
                data_list_1 = sel.xpath(x_path+'text()').extract()
                for aa in data_list_1:
                    data_list.append(aa)
                if j != 6: 
                    data_list_2 = sel.xpath(x_path+'/a/text()').extract()
                    if data_list_2 != []:
                        data_list.append(data_list_2[0])
                    else:
                        data_list.append('')
                else:
                    data_list_3 = sel.xpath(x_path+'/a/text()').extract()
                    if data_list_3 != []:
                        PaC = data_list_3[0]+data_list_3[1]
                        data_list.append(PaC)
                    else:
                        data_list.append('')
            data.append(data_list)
        f = open('item.json','a+')
        for itemList in data:
            for item in itemList:
                f.write(item.encode('utf-8')+' ')
            f.write('\n')
                
#         item['IP'].remove(item['IP'][0])
#         item['Port'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[2]/li/text()').extract()
#         item['Port'].remove(item['Port'][0])
#         item['Anonymity'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[3]/li/a/text()').extract()
#         item['Type'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[4]/li/a/text()').extract()
#         item['Country'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[5]/li/a/text()').extract()
#         item['PaC'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[6]/li/a/text()').extract()
#         item['Operator'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[7]/li/a/text()').extract()
#         item['Speed'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[8]/li/text()').extract()
#         item['Speed'].remove(item['Speed'][0])
#         item['Time'] = sel.xpath('/html/body/div[4]/li[2]/ul/span[9]/li/text()').extract()
#         item['Time'].remove(item['Time'][0])
#         for aa in item['IP']:
#             print aa
#         items.append(item)
#         return items
